This project investigates Symbiodinium communities in scleractinian corals sampled at various sites on the north and south shores of St. John, US Virgin Islands in August 2012. The ITS2 region of nrDNA isolated from coral samples was amplified and sequenced by paired-end 300bp reads on an Illumina MiSeq platform. Sequence data is processed through a custom bioinformatic pipeline and resulting OTU tables are analyzed by various multivariate statistical approaches to ask questions regarding alpha and beta diversity of Symbiodinium communities within and across coral species. The major aims of this work are to 1) characterize Symbiodinium communities in a range of coral species using high-throughput sequencing (many for the first time), and 2) to forward novel bioinformatic and statistical approaches to better understand Symbiodinium ecology.
A custom pipeline is used here to process ITS2 sequence data. Briefly, paired reads were merged using the illumina-utils and filtered to keep only those with 3 mismatches or less. Chimeras were then removed by usearch. From here, sequence data was processed using both of two approaches:
OTU tables and taxonomic assignments from both of these bioinformatic approaches are used in comparative downstream analyses.
The phyloseq package is used here to combine the OTU table, taxonomic assignments, and sample metadata into a single R data object to facilitate downstream analyses.
# Import OTU tables
OTU97 <- otu_table(read.table("data/97_otus.tsv", header=T, check.names=F, row.names=1,
skip=1, sep="\t", comment.char=""), taxa_are_rows=T)
OTU97bs <- otu_table(read.table("data/97_otus_bysample.tsv", header=T, check.names=F, row.names=1,
skip=1, sep="\t", comment.char=""), taxa_are_rows=T)
OTU100 <- otu_table(read.table("data/100_otus.tsv", header=T, check.names=F, row.names=1,
skip=1, sep="\t", comment.char=""), taxa_are_rows=T)
# Import taxonomy
TAX97 <- tax_table(as.matrix(read.table("data/97_otus_tax_assignments.txt", sep="\t", row.names=2,
col.names=c("Clade", "OTU.ID", "Taxonomy", "Eval", "Subtype"))))
TAX97bs <- tax_table(as.matrix(read.table("data/97_otus_bysample_tax_assignments.txt", sep="\t", row.names=2,
col.names=c("Clade", "OTU.ID", "Taxonomy", "Eval", "Subtype"))))
TAX100 <- tax_table(as.matrix(read.table("data/100_otus_tax_assignments.txt", sep="\t", row.names=2,
col.names=c("Clade", "OTU.ID", "Taxonomy", "Eval", "Subtype"))))
# Import sample data
SAM <- sample_data(read.delim("data/mapping_file.txt", sep="\t", header=T, row.names=1))
# Build phyloseq objects
phy97 <- phyloseq(OTU97, TAX97, SAM)
phy97bs <- phyloseq(OTU97bs, TAX97bs, SAM)
phy100 <- phyloseq(OTU100, TAX100, SAM)
# Compute summary statistics
stats <- function(phy) {
return(
data.frame(
'Total count in OTU table'=sum(otu_table(phy)),
'Number of OTUs'=ntaxa(phy),
'Range of OTU counts'=paste0(range(taxa_sums(phy)), collapse=" - "),
'Number of singleton OTUs'=length(taxa_sums(phy)[taxa_sums(phy) <= 1]),
'Number of samples'=nsamples(phy),
'Range of reads per sample'=paste0(range(sample_sums(phy)), collapse=" - "),
'Arithmetic mean (±SD) reads per sample'=paste(as.integer(mean(sample_sums(phy))),
as.integer(sd(sample_sums(phy))), sep=" ± "),
'Geometric mean (±SD) reads per sample'=paste(as.integer(exp(mean(log(sample_sums(phy))))),
as.integer(exp(sd(log(sample_sums(phy))))), sep=" ± "),
check.names=F)
)
}
stats97 <- data.frame(`97% OTUs`=t(stats(phy97)), check.names=F)
stats97bs <- data.frame(`97% within-sample OTUs`=t(stats(phy97bs)), check.names=F)
stats100 <- data.frame(`100% OTUs`=t(stats(phy100)), check.names=F)
# Create and plot histograms
taxhist97 <- hist(log10(taxa_sums(phy97)), plot=F)
samhist97 <- hist(log10(sample_sums(phy97)), plot=F)
taxhist97bs <- hist(log10(taxa_sums(phy97bs)), plot=F)
samhist97bs <- hist(log10(sample_sums(phy97bs)), plot=F)
taxhist100 <- hist(log10(taxa_sums(phy100)), plot=F)
samhist100 <- hist(log10(sample_sums(phy100)), plot=F)
par(mfrow=c(2, 3), mar=c(3,3,1,1))
plot(taxhist97, col="black", main="97% OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist97bs, col="black", main="97% within-sample OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist100, col="black", main="100% OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(samhist97, col="black", main="97% OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist97bs, col="black", main="97% within-sample OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist100, col="black", main="100% OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
# Create stats table
knitr::kable(cbind(stats97, stats97bs, stats100))
| 97% OTUs | 97% within-sample OTUs | 100% OTUs | |
|---|---|---|---|
| Total count in OTU table | 1492736 | 1492692 | 1056483 |
| Number of OTUs | 135 | 174 | 41449 |
| Range of OTU counts | 2 - 739709 | 2 - 474177 | 2 - 171487 |
| Number of singleton OTUs | 0 | 0 | 0 |
| Number of samples | 84 | 84 | 84 |
| Range of reads per sample | 3 - 169977 | 3 - 169975 | 1 - 112823 |
| Arithmetic mean (±SD) reads per sample | 17770 ± 20982 | 17770 ± 20982 | 12577 ± 14480 |
| Geometric mean (±SD) reads per sample | 9427 ± 5 | 9400 ± 5 | 6451 ± 6 |
# Set threshold number of reads
sn <- 0
# Remove samples with fewer reads than threshold
phy97.f <- prune_samples(sample_sums(phy97)>=sn, phy97)
phy97bs.f <- prune_samples(sample_sums(phy97bs)>=sn, phy97bs)
phy100.f <- prune_samples(sample_sums(phy100)>=sn, phy100)
# Set threshold count
n <- 10
# Identify OTUs below threshold count
taxa97 <- taxa_sums(phy97.f)[which(taxa_sums(phy97.f) >= n)]
taxa97bs <- taxa_sums(phy97bs.f)[which(taxa_sums(phy97bs.f) >= n)]
taxa100 <- taxa_sums(phy100.f)[which(taxa_sums(phy100.f) >= n)]
# Remove taxa below threshold count
phy97.f <- prune_taxa(names(taxa97), phy97.f)
phy97bs.f <- prune_taxa(names(taxa97bs), phy97bs.f)
phy100.f <- prune_taxa(names(taxa100), phy100.f)
# Refilter samples in case removal of OTUs caused any samples to drop below threshold
sn <- 200
phy97.f <- prune_samples(sample_sums(phy97.f)>=sn, phy97.f)
phy97bs.f <- prune_samples(sample_sums(phy97bs.f)>=sn, phy97bs.f)
phy100.f <- prune_samples(sample_sums(phy100.f)>=sn, phy100.f)
# Compute summary statistics
stats97.f <- data.frame(`97% OTUs`=t(stats(phy97.f)), check.names=F)
stats97bs.f <- data.frame(`97% within-sample OTUs`=t(stats(phy97bs.f)), check.names=F)
stats100.f <- data.frame(`100% OTUs`=t(stats(phy100.f)), check.names=F)
# Create and plot histograms
taxhist97 <- hist(log10(taxa_sums(phy97.f)), plot=F)
taxhist97bs <- hist(log10(taxa_sums(phy97bs.f)), plot=F)
taxhist100 <- hist(log10(taxa_sums(phy100.f)), plot=F)
samhist97 <- hist(log10(sample_sums(phy97.f)), plot=F)
samhist97bs <- hist(log10(sample_sums(phy97bs.f)), plot=F)
samhist100 <- hist(log10(sample_sums(phy100.f)), plot=F)
par(mfrow=c(2, 3), mar=c(3,3,1,1))
plot(taxhist97, col="black", main="97% OTU counts", xlim=c(0, 5), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist97bs, col="black", main="97% OTU counts", xlim=c(0, 5), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist100, col="black", main="100% OTU counts", xlim=c(0, 5), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(samhist97, col="black", main="97% OTU reads per sample", xlim=c(0, 5), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist97bs, col="black", main="97% OTU reads per sample", xlim=c(0, 5), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist100, col="black", main="100% OTU reads per sample", xlim=c(0, 5), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
# Create stats table
knitr::kable(cbind(stats97.f, stats97bs.f, stats100.f))
| 97% OTUs | 97% within-sample OTUs | 100% OTUs | |
|---|---|---|---|
| Total count in OTU table | 1492454 | 1492337 | 944975 |
| Number of OTUs | 102 | 117 | 4727 |
| Range of OTU counts | 10 - 739699 | 0 - 474176 | 10 - 171442 |
| Number of singleton OTUs | 0 | 1 | 0 |
| Number of samples | 80 | 80 | 80 |
| Range of reads per sample | 706 - 169958 | 707 - 169964 | 485 - 97036 |
| Arithmetic mean (±SD) reads per sample | 18655 ± 21113 | 18654 ± 21114 | 11812 ± 12750 |
| Geometric mean (±SD) reads per sample | 13165 ± 2 | 13161 ± 2 | 8152 ± 2 |
# Convert to proportion (relative abundance)
phy97.f.p <- transform_sample_counts(phy97.f, function(x) x/sum(x))
phy97bs.f.p <- transform_sample_counts(phy97bs.f, function(x) x/sum(x))
phy100.f.p <- transform_sample_counts(phy100.f, function(x) x/sum(x))
# Apply transformation function
transform <- function(x) sqrt(x/sum(x)) # Set transformation function
phy97.f.t <- transform_sample_counts(phy97.f, transform) # Transform data
phy97bs.f.t <- transform_sample_counts(phy97bs.f, transform)
phy100.f.t <- transform_sample_counts(phy100.f, transform)
Here the composition of each sample is plotted as a horizontal bar, sorted by species and shore. Each segment of the bar represents an individual OTU. In the lefthand plot, all OTUs are colored by clade, in the right OTU, all OTUs are given a unique color.
# Define function to plot Symbiodinium community composition
composition <- function(phy, col, legend=T) {
samdat <- data.frame(sample_data(phy))
samdat$Genus <- factor(samdat$Genus, levels=rev(levels(samdat$Genus)))
samdat$Species <- factor(samdat$Species, levels=rev(levels(samdat$Species)))
samdat$Shore <- factor(samdat$Shore, levels=rev(levels(samdat$Shore)))
samdat <- samdat[with(samdat, order(Genus, Species, Shore)), ]
typerelabund <- as.matrix(otu_table(phy)[order(data.frame(tax_table(phy))$Subtype),
rownames(samdat)])
shorebreaks <- c(as.character(samdat$Shore), "X")==c("X", as.character(samdat$Shore))
shorebreaks <- which(shorebreaks==F) - 1
spbreaks <- c(which(duplicated(samdat$Species)==F) - 1, nrow(samdat))
# Make Barplot
barplot(typerelabund, horiz=T, space=0, axes=F,axisnames=F, yaxs="i", col=col)
rect(0, 0, par("usr")[2], par("usr")[4], lwd=1, xpd=T)
axis(side=1, at=seq(0, 1, 0.1), line=0, tck=-0.025, mgp=c(0,0.25,0), cex.axis=0.7)
mtext(side=1, "Relative abundance", cex=0.7, line=1)
# Add legend
if (legend==T) {
legend(x=par("usr")[2]/2, y=par("usr")[4], xjust=0.5, yjust=0.25, horiz=T, bty="n", xpd=T,
cex=0.7, legend=c("A", "B", "C", "D", "F", "G"), fill=taxcolors, x.intersp=0.5)
legend(x=par("usr")[2]-0.03, y=par("usr")[4], xjust=0, yjust=0.1, bty="n", xpd=T, cex=0.6,
pt.cex=0.6, legend=c("N Shore", "S Shore"), fill=c(0, "gray40"), y.intersp=0.7,
x.intersp=0.3)
}
# Add grouping bars for Shore
for (i in 1:length(shorebreaks)) {
lines(c(0, 1), c(shorebreaks[i], shorebreaks[i]), lty=2, lwd=0.25)
rect(1.01, shorebreaks[i], 1.04, shorebreaks[i+1], col=rep(c("gray40", 0), 50)[i],
lwd=0.25, xpd=T)
}
# Add lines to separate species and species names
for (i in 1:length(spbreaks)) {
lines(c(0, 1.07), c(spbreaks[i], spbreaks[i]), xpd=T, type="l", lwd=0.4)
text(1.03, (spbreaks[i] + spbreaks[i+1]) / 2, xpd=T, pos=4, cex=0.6,
labels=paste(samdat$Genus[which(duplicated(samdat$Species)==F)][i], "\n",
samdat$Species[which(duplicated(samdat$Species)==F)][i], sep=""))
}
}
par(mfrow=c(1,2), mar=c(2, 1.5, 2, 5), lwd=0.1, cex=0.7)
# Plot composition of 97% OTUs colored by clade
composition(phy97.f.p, col=taxcolors[data.frame(tax_table(phy97.f.p))[order(data.frame(tax_table(phy97.f.p))$Subtype), ]$Clade], legend=T)
# Plot composition of 97% OTUs colored by OTU
composition(phy97.f.p, col=rainbow(ntaxa(phy97.f.p)), legend=F)
# Plot composition of 97% within-sample OTUs colored by clade
composition(phy97bs.f.p, col=taxcolors[factor(data.frame(tax_table(phy97bs.f.p))[order(data.frame(tax_table(phy97bs.f.p))$Subtype), ]$Clade, levels=c("A","B","C","D","F","G"))], legend=T)
# Plot composition of 97% within-sample OTUs colored by OTU
composition(phy97bs.f.p, col=rainbow(ntaxa(phy97bs.f.p)), legend=F)
par(mfrow=c(1,2), mar=c(2, 1.5, 2, 5), lwd=0.1, cex=0.7)
# Plot composition of 100% OTUs colored by clade
composition(phy100.f.p, col=taxcolors[data.frame(tax_table(phy100.f.p))[order(data.frame(tax_table(phy100.f.p))$Subtype), ]$Clade], legend=T)
# Plot composition of 100% OTUs colored by OTU
composition(phy100.f.p, col=rainbow(ntaxa(phy100.f.p)), legend=F)
Whether Symbiodinium communities differ among species is evaluated using permutational analysis of variance (PERMANOVA).
# PERMANOVA for difference among species
sppdiffs <- function(phy) {
permanova.spp.t <- adonis(phyloseq::distance(phy, "bray") ~ Species,
data=as(sample_data(phy), "data.frame"), permutations=9999)
permanova.spp.t
# Pairwise PERMANOVA tests for differences between species
dmat <- as(phyloseq::distance(phy, "bray"), "matrix")
df <- as(sample_data(phy), "data.frame")
pairs <- data.frame(t(combn(levels(df$Species), 2)), stringsAsFactors=F)
p.pairs <- setNames(rep(NA, nrow(pairs)), with(pairs, paste(X1, X2, sep='-')))
for (i in 1:nrow(pairs)) {
dfs <- subset(df, Species %in% c(pairs[i,1], pairs[i,2]))
dmats <- dmat[rownames(dfs), rownames(dfs)]
set.seed(152470)
permanova <- adonis(dmats ~ Species, data=dfs, permutations=999)
p.pairs[i] <- permanova$aov.tab$`Pr(>F)`[1]
}
# Return letters signifying differences
return(multcompLetters(p.pairs))
}
sppdiffs97 <- sppdiffs(phy97.f.t)
sppdiffs97bs <- sppdiffs(phy97bs.f.t)
sppdiffs100 <- sppdiffs(phy100.f.t)
sppdiffs <- data.frame("97% OTUs"=sppdiffs97$Letters, "97% within-sample OTUs"=sppdiffs97bs$Letters,
"100% OTUs"=sppdiffs100$Letters, check.names=F)
knitr::kable(sppdiffs, caption="Species not sharing a letter are significantly different (p < 0.05)")
| 97% OTUs | 97% within-sample OTUs | 100% OTUs | |
|---|---|---|---|
| alcicornis | a | ab | a |
| annularis | bc | ab | b |
| astreoides | d | c | c |
| cavernosa | e | d | d |
| cylindrus | f | a | e |
| fragum | a | ab | f |
| furcata | b | e | g |
| radians | g | f | h |
| siderea | g | d | d |
| strigosa | ac | b | b |
Whether Symbiodinium communities differ between north vs. south shores within species is tested here using PERMANOVA.
shorediffs <- function(phy) {
# Compute within- and between-shore statistics for each Species
shorestats <- data.frame(matrix(dimnames=list(levels(data.frame(sample_data(phy))$Species),
c("Species", "n", "overall", "within",
"between", "R2", "bd", "p")), ncol=8, nrow=10))
for (i in 1:nlevels(data.frame(sample_data(phy))$Species)) {
sp <<- levels(data.frame(sample_data(phy))$Species)[i]
shorestats$Species[i] <- sp
phy.species <- subset_samples(phy, Species==sp) # Set temp phyloseq object
shorestats$n[i] <- nsamples(phy.species)
md <- meandist(phyloseq::distance(phy.species, "bray"),
sample_data(phy.species)$Shore)
shorestats$within[i] <- summary(md)$W # Weighted mean dissimilarity within both Pools
shorestats$between[i] <- summary(md)$B # Mean dissimilarity between Pools
shorestats$overall[i] <- summary(md)$D # Overall dissimilarity
if (nlevels(as(sample_data(phy.species), "data.frame")$Shore) > 1) {
#set.seed(70235)
set.seed(43789)
permanova <- adonis(phyloseq::distance(phy.species, "bray") ~ Shore,
data=as(sample_data(phy.species), "data.frame"), permutations=999)
shorestats$R2[i] <- permanova$aov.tab$"R2"[1] # PERMANOVA partial R-squared
shorestats$p[i] <- permanova$aov.tab$"Pr(>F)"[1] # PERMANOVA p-value
bd <- betadisper(phyloseq::distance(phy.species, "bray"),
group=as(sample_data(phy.species), "data.frame")$Shore)
shorestats$bd[i] <- TukeyHSD(bd)$group[4]
} else {
shorestats$R2[i] <- NA
shorestats$p[i] <- NA
shorestats$bd[i] <- NA
}
}
return(shorestats)
}
shorestats97 <- shorediffs(phy97.f.t)
shorestats97bs <- shorediffs(phy97bs.f.t)
shorediffs(phy97bs.f.p)
## Species n overall within between R2 bd p
## alcicornis alcicornis 10 0.60398 0.65378 0.56414 0.037 1.00 0.798
## annularis annularis 4 0.63204 0.63204 NaN NA NA NA
## astreoides astreoides 5 0.00433 0.00433 NaN NA NA NA
## cavernosa cavernosa 7 0.00068 0.00073 0.00065 0.183 0.36 0.426
## cylindrus cylindrus 8 0.02571 0.02696 0.02463 0.096 0.87 0.848
## fragum fragum 8 0.25144 0.30904 0.20152 0.086 0.48 0.721
## furcata furcata 9 0.70295 0.55499 0.82132 0.345 0.82 0.040
## radians radians 10 0.02971 0.03101 0.02867 0.064 0.90 0.944
## siderea siderea 10 0.51237 0.54145 0.48910 0.061 0.54 0.484
## strigosa strigosa 9 0.78854 0.71022 0.85119 0.242 0.21 0.075
shorestats100 <- shorediffs(phy100.f.t)
knitr::kable(shorestats97, row.names=F,
caption="97% OTU data: differences within and between shore for each species")
| Species | n | overall | within | between | R2 | bd | p |
|---|---|---|---|---|---|---|---|
| alcicornis | 10 | 0.13 | 0.13 | 0.13 | 0.11 | 0.17 | 0.44 |
| annularis | 4 | 0.59 | 0.59 | NaN | NA | NA | NA |
| astreoides | 5 | 0.07 | 0.07 | NaN | NA | NA | NA |
| cavernosa | 7 | 0.13 | 0.12 | 0.13 | 0.26 | 0.24 | 0.07 |
| cylindrus | 8 | 0.13 | 0.10 | 0.14 | 0.27 | 0.20 | 0.03 |
| fragum | 8 | 0.22 | 0.26 | 0.18 | 0.08 | 0.40 | 0.85 |
| furcata | 9 | 0.54 | 0.37 | 0.67 | 0.53 | 0.20 | 0.07 |
| radians | 10 | 0.13 | 0.14 | 0.13 | 0.06 | 0.96 | 0.84 |
| siderea | 10 | 0.30 | 0.28 | 0.32 | 0.21 | 0.19 | 0.02 |
| strigosa | 9 | 0.42 | 0.41 | 0.43 | 0.21 | 0.26 | 0.34 |
knitr::kable(shorestats97bs, row.names=F,
caption="97% within-sample OTU data: differences within and between shore for each species")
| Species | n | overall | within | between | R2 | bd | p |
|---|---|---|---|---|---|---|---|
| alcicornis | 10 | 0.63 | 0.68 | 0.59 | 0.04 | 0.97 | 0.85 |
| annularis | 4 | 0.58 | 0.58 | NaN | NA | NA | NA |
| astreoides | 5 | 0.06 | 0.06 | NaN | NA | NA | NA |
| cavernosa | 7 | 0.01 | 0.02 | 0.01 | 0.16 | 0.39 | 0.67 |
| cylindrus | 8 | 0.13 | 0.14 | 0.13 | 0.14 | 0.54 | 0.62 |
| fragum | 8 | 0.27 | 0.33 | 0.22 | 0.09 | 0.48 | 0.79 |
| furcata | 9 | 0.71 | 0.56 | 0.84 | 0.36 | 0.81 | 0.05 |
| radians | 10 | 0.10 | 0.10 | 0.10 | 0.12 | 0.85 | 0.37 |
| siderea | 10 | 0.52 | 0.53 | 0.51 | 0.09 | 0.57 | 0.50 |
| strigosa | 9 | 0.81 | 0.76 | 0.86 | 0.23 | 0.23 | 0.08 |
knitr::kable(shorestats100, row.names=F,
caption="100% OTU data: differences within and between shore for each species")
| Species | n | overall | within | between | R2 | bd | p |
|---|---|---|---|---|---|---|---|
| alcicornis | 10 | 0.64 | 0.65 | 0.64 | 0.10 | 0.57 | 0.54 |
| annularis | 4 | 0.73 | 0.73 | NaN | NA | NA | NA |
| astreoides | 5 | 0.41 | 0.41 | NaN | NA | NA | NA |
| cavernosa | 7 | 0.45 | 0.45 | 0.46 | 0.19 | 0.95 | 0.26 |
| cylindrus | 8 | 0.43 | 0.38 | 0.48 | 0.21 | 0.03 | 0.07 |
| fragum | 8 | 0.55 | 0.57 | 0.53 | 0.13 | 0.30 | 0.55 |
| furcata | 9 | 0.70 | 0.55 | 0.81 | 0.45 | 0.39 | 0.02 |
| radians | 10 | 0.40 | 0.40 | 0.39 | 0.07 | 0.50 | 0.93 |
| siderea | 10 | 0.67 | 0.68 | 0.66 | 0.10 | 0.45 | 0.48 |
| strigosa | 9 | 0.81 | 0.76 | 0.85 | 0.22 | 0.04 | 0.11 |
Beta diversity is evaluated here as the multivariate dispersion of samples within a coral species group. Principal coordinate analysis on Bray-Curtis dissimilarities is used to calculate average distance-to-centroid values for each species group, which are then compared statistically by a permutation test. This analysis is implemented using betadisper() in the vegan package, based on Anderson (2006).
betad <- function(phy) {
# Calculate distances to species centroids for each sample in principal coordinate space
sambd <- betadisper(d=vegdist(t(otu_table(phy)), method="bray"),
group=data.frame(sample_data(phy))$Species, type="centroid",
bias.adjust=TRUE)
# Calculate each species' mean distance to centroid and standard error
sambdsumm <- aggregate(data.frame(mean=sambd$distances), by=list(group=sambd$group), FUN=mean)
sambdsumm$se <- aggregate(sambd$distances, by=list(group=sambd$group),
FUN=function(x) sd(x)/sqrt(length(x)))$x
# Determine order of decreasing mean dispersion
sambdsumm.ord <- sambdsumm[order(sambdsumm$mean, decreasing=T), ]
# Update betadisper object with species in decreasing order of mean dispersion
sambd <- betadisper(d=vegdist(t(otu_table(phy)), method="bray"),
group=factor(data.frame(sample_data(phy))$Species,
levels=as.character(sambdsumm.ord$group)),
type="centroid", bias.adjust=TRUE)
# Use permutations to perform pairwise comparisons of group mean dispersions
sampt <- permutest(sambd, pairwise=T)
saml <- multcompLetters(sampt$pairwise$permuted)
# Return results as a list
return(list(sambdsumm.ord=sambdsumm.ord, sambdsumm=sambdsumm, saml=saml))
}
betad97 <- betad(phy97.f.t)
betad97bs <- betad(phy97bs.f.t)
betad100 <- betad(phy100.f.t)
# Figure: Distance to centroids
par(mfrow=c(1,3), mar=c(3.5,4,2,1), lwd=1)
with(betad97$sambdsumm.ord, {
plot(mean, type="n", main="97% OTUs",
ylim=c(0, 0.75), ylab="Distance to centroid", xaxt="n", xlab="")
arrows(1:10, mean - se, 1:10, mean + se, length=0.05, angle=90, code=3)
points(1:10, mean, cex=1, pch=21, bg="white")
text(1:10, par("usr")[3]-0.025, srt=45, adj=1, xpd=T, cex=0.75,
labels=levels(SAM$Species)[order(betad97$sambdsumm$mean, decreasing=T)])
text(1:10, mean + se + 0.03, labels=betad97$saml$Letters[as.character(group)], cex=0.5)
})
with(betad97bs$sambdsumm.ord, {
plot(mean, type="n", main="97% within-sample OTUs",
ylim=c(0, 0.75), ylab="Distance to centroid", xaxt="n", xlab="")
arrows(1:10, mean - se, 1:10, mean + se, length=0.05, angle=90, code=3)
points(1:10, mean, cex=1, pch=21, bg="white")
text(1:10, par("usr")[3]-0.025, srt=45, adj=1, xpd=T, cex=0.75,
labels=levels(SAM$Species)[order(betad97bs$sambdsumm$mean, decreasing=T)])
text(1:10, mean + se + 0.03, labels=betad97bs$saml$Letters[as.character(group)], cex=0.5)
})
with(betad100$sambdsumm.ord, {
plot(mean, type="n", main="100% OTUs",
ylim=c(0, 0.75), ylab="Distance to centroid", xaxt="n", xlab="")
arrows(1:10, mean - se, 1:10, mean + se, length=0.05, angle=90, code=3)
points(1:10, mean, cex=1, pch=21, bg="white")
text(1:10, par("usr")[3]-0.025, srt=45, adj=1, xpd=T, cex=0.75,
labels=levels(SAM$Species)[order(betad100$sambdsumm$mean, decreasing=T)])
text(1:10, mean + se + 0.03, labels=betad100$saml$Letters[as.character(group)], cex=0.5)
})
otubarplot <- function(phy, main) {
samdat <- data.frame(sample_data(phy))
typerelabund <- as.matrix(otu_table(phy)[order(data.frame(tax_table(phy))$Subtype),
rownames(samdat)])
typerelabund <- typerelabund[, rownames(sample_data(phy)[with(sample_data(phy),
order(Shore, Site, InputFileName))])]
# Get info for plotting OTU names and blast hits on top of barplot
blasthits <- as.character(data.frame(tax_table(phy))[order(data.frame(tax_table(phy))$Subtype), "Subtype"])
names <- as.character(rownames(data.frame(tax_table(phy)))[order(data.frame(tax_table(phy))$Subtype)])
samples <- colnames(typerelabund)
shores <- data.frame(sample_data(phy))[samples, "Shore"]
sites <- data.frame(sample_data(phy))[samples, "Site"]
divides <- apply(typerelabund, 2, cumsum)
heights <- diff(divides)
heights[heights < 0.04] <- NA
# Plot barplot and OTU names and blast hits
bars <- barplot(typerelabund, col=rainbow(ntaxa(phy)), las=1, space=0.1, xaxs="i",
xlab="", ylab="", xaxt="n")
text(bars, -0.06, labels=samples, xpd=T)
text(bars, -0.10, labels=shores, xpd=T)
text(bars, -0.14, labels=substr(sites, 1, 2), xpd=T)
text(bars[1]/4, c(-0.06, -0.10, -0.14), labels=c("sample:", "shore:", "site:"), xpd=T, pos=2)
mtext(side=2, text = "Relative Abundance", line=2)
for (i in 1:length(bars)) {
text(rep(bars[i], length(heights[which(!is.na(heights[,i])),i])),
divides[which(!is.na(heights[,i])),i] + heights[which(!is.na(heights[,i])),i] / 2,
labels=paste(names[which(!is.na(heights[,i]))+1],blasthits[which(!is.na(heights[,i]))+1],sep="\n"),
cex=0.75)
}
mtext(side=3, main, xpd=T, adj=0, line=0.5, font=2)
}
# Create subsetted phyloseq objects for Diploria strigosa
dstr97.f.p <- subset_samples(phy97.f.p, Species=="strigosa")
dstr97bs.f.p <- subset_samples(phy97bs.f.p, Species=="strigosa")
dstr100.f.p <- subset_samples(phy100.f.p, Species=="strigosa")
# Plot custom barplots for Diploria strigosa
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(dstr97.f.p, main="97% OTUs")
otubarplot(dstr97bs.f.p, main="97% within-sample OTUs")
otubarplot(dstr100.f.p, main="100% OTUs")
# Create subsetted phyloseq objects for Porites furcata
pfur97.f.p <- subset_samples(phy97.f.p, Species=="furcata")
pfur97bs.f.p <- subset_samples(phy97bs.f.p, Species=="furcata")
pfur100.f.p <- subset_samples(phy100.f.p, Species=="furcata")
# Plot custom barplots for Porites furcata
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(pfur97.f.p, main="97% OTUs")
otubarplot(pfur97bs.f.p, main="97% within-sample OTUs")
otubarplot(pfur100.f.p, main="100% OTUs")
# Create subsetted phyloseq objects for Orbicella annularis
oann97.f.p <- subset_samples(phy97.f.p, Species=="annularis")
oann97bs.f.p <- subset_samples(phy97bs.f.p, Species=="annularis")
oann100.f.p <- subset_samples(phy100.f.p, Species=="annularis")
# Plot custom barplots for Orbicella annularis
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(oann97.f.p, main="97% OTUs")
otubarplot(oann97bs.f.p, main="97% within-sample OTUs")
otubarplot(oann100.f.p, main="100% OTUs")
# Create subsetted phyloseq objects for Millepora alcicornis
malc97.f.p <- subset_samples(phy97.f.p, Species=="alcicornis")
malc97bs.f.p <- subset_samples(phy97bs.f.p, Species=="alcicornis")
malc100.f.p <- subset_samples(phy100.f.p, Species=="alcicornis")
# Plot custom barplots for Millepora alcicornis
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(malc97.f.p, main="97% OTUs")
otubarplot(malc97bs.f.p, main="97% within-sample OTUs")
otubarplot(malc100.f.p, main="100% OTUs")
# Create subsetted phyloseq objects for Siderastrea siderea
ssid97.f.p <- subset_samples(phy97.f.p, Species=="siderea")
ssid97bs.f.p <- subset_samples(phy97bs.f.p, Species=="siderea")
ssid100.f.p <- subset_samples(phy100.f.p, Species=="siderea")
# Plot custom barplots for Siderastrea siderea
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(ssid97.f.p, main="97% OTUs")
otubarplot(ssid97bs.f.p, main="97% within-sample OTUs")
otubarplot(ssid100.f.p, main="100% OTUs")
# Create subsetted phyloseq objects for Favia fragum
ffra97.f.p <- subset_samples(phy97.f.p, Species=="fragum")
ffra97bs.f.p <- subset_samples(phy97bs.f.p, Species=="fragum")
ffra100.f.p <- subset_samples(phy100.f.p, Species=="fragum")
# Plot custom barplots for Favia fragum
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(ffra97.f.p, main="97% OTUs")
otubarplot(ffra97bs.f.p, main="97% within-sample OTUs")
otubarplot(ffra100.f.p, main="100% OTUs")
# Create subsetted phyloseq objects for Siderastrea radians
srad97.f.p <- subset_samples(phy97.f.p, Species=="radians")
srad97bs.f.p <- subset_samples(phy97bs.f.p, Species=="radians")
srad100.f.p <- subset_samples(phy100.f.p, Species=="radians")
# Plot custom barplots for Siderastrea radians
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(srad97.f.p, main="97% OTUs")
otubarplot(srad97bs.f.p, main="97% within-sample OTUs")
otubarplot(srad100.f.p, main="100% OTUs")
# Create subsetted phyloseq objects for Porites astreoides
past97.f.p <- subset_samples(phy97.f.p, Species=="astreoides")
past97bs.f.p <- subset_samples(phy97bs.f.p, Species=="astreoides")
past100.f.p <- subset_samples(phy100.f.p, Species=="astreoides")
# Plot custom barplots for Porites astreoides
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(past97.f.p, main="97% OTUs")
otubarplot(past97bs.f.p, main="97% within-sample OTUs")
otubarplot(past100.f.p, main="100% OTUs")
# Create subsetted phyloseq objects for Dendrogyra cylindrus
dcyl97.f.p <- subset_samples(phy97.f.p, Species=="cylindrus")
dcyl97bs.f.p <- subset_samples(phy97bs.f.p, Species=="cylindrus")
dcyl100.f.p <- subset_samples(phy100.f.p, Species=="cylindrus")
# Plot custom barplots for Dendrogyra cylindrus
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(dcyl97.f.p, main="97% OTUs")
otubarplot(dcyl97bs.f.p, main="97% within-sample OTUs")
otubarplot(dcyl100.f.p, main="100% OTUs")
# Create subsetted phyloseq objects for Montastraea cavernosa
mcav97.f.p <- subset_samples(phy97.f.p, Species=="cavernosa")
mcav97bs.f.p <- subset_samples(phy97bs.f.p, Species=="cavernosa")
mcav100.f.p <- subset_samples(phy100.f.p, Species=="cavernosa")
# Plot custom barplots for Montastraea cavernosa
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(mcav97.f.p, main="97% OTUs")
otubarplot(mcav97bs.f.p, main="97% within-sample OTUs")
otubarplot(mcav100.f.p, main="100% OTUs")
make.net <- function(physeq, n) {
# Remove taxa that are not present in subset
physeq <- prune_taxa(rowSums(otu_table(physeq))!=0, physeq)
# Convert OTU table to "edges" table (weight = relative abundance)
otudf <- data.frame(t(otu_table(physeq)), check.names=F)
edges <- setNames(melt(as.matrix(otudf)), nm=c("source", "target", "weight")) # Melt data
edges <- edges[edges$weight>n, ]
# Create "nodes" table from sample and tax data
sdf <- data.frame(sample_data(physeq))
sdf$id <- rownames(sdf)
tdf <- data.frame(tax_table(physeq))
tdf$id <- rownames(tdf)
nodes <- merge(sdf, tdf, all=T)
# Create network
net <- graph_from_data_frame(d=edges, vertices=nodes, directed=F)
return(net)
}
dstr.net <- make.net(dstr97bs.f.p, 0)
set.seed(5453890)
plot(dstr.net,
edge.curved=0.1,
edge.width=10*(E(dstr.net)$weight)^0.2,
vertex.label=substr(V(dstr.net)$Subtype, 1, 4),
vertex.color=taxcolors[factor(V(dstr.net)$Clade, levels=c("A","B","C","D","F","G"))])
pfur.net <- make.net(pfur97bs.f.p, 0)
plot(pfur.net,
edge.curved=0.1,
edge.width=10*(E(pfur.net)$weight)^0.2,
vertex.label=substr(V(pfur.net)$Subtype, 1, 4),
vertex.color=taxcolors[factor(V(pfur.net)$Clade, levels=c("A","B","C","D","F","G"))])
oann.net <- make.net(oann97bs.f.p, 0)
plot(oann.net,
edge.curved=0.1,
edge.width=10*(E(oann.net)$weight)^0.2,
vertex.label=substr(V(oann.net)$Subtype, 1, 4),
vertex.color=taxcolors[factor(V(oann.net)$Clade, levels=c("A","B","C","D","F","G"))])
malc.net <- make.net(malc97bs.f.p, 0)
plot(malc.net,
edge.curved=0.1,
edge.width=10*(E(malc.net)$weight)^0.2,
vertex.label=substr(V(malc.net)$Subtype, 1, 4),
vertex.color=taxcolors[factor(V(malc.net)$Clade, levels=c("A","B","C","D","F","G"))])
set.seed(9147)
ssid.net <- make.net(ssid97bs.f.p, 0)
plot(ssid.net,
edge.curved=0.1,
edge.width=10*(E(ssid.net)$weight)^0.2,
vertex.label=substr(V(ssid.net)$Subtype, 1, 4),
vertex.color=taxcolors[factor(V(ssid.net)$Clade, levels=c("A","B","C","D","F","G"))])
ffra.net <- make.net(ffra97bs.f.p, 0)
plot(ffra.net,
edge.curved=0.1,
edge.width=10*(E(ffra.net)$weight)^0.2,
vertex.label=substr(V(ffra.net)$Subtype, 1, 4),
vertex.color=taxcolors[factor(V(ffra.net)$Clade, levels=c("A","B","C","D","F","G"))])
srad.net <- make.net(srad97bs.f.p, 0)
plot(srad.net,
edge.curved=0.1,
edge.width=10*(E(srad.net)$weight)^0.2,
vertex.label=substr(V(srad.net)$Subtype, 1, 4),
vertex.color=taxcolors[factor(V(srad.net)$Clade, levels=c("A","B","C","D","F","G"))])
past.net <- make.net(past97bs.f.p, 0)
plot(past.net,
edge.curved=0.1,
edge.width=10*(E(past.net)$weight)^0.2,
vertex.label=substr(V(past.net)$Subtype, 1, 4),
vertex.color=taxcolors[factor(V(past.net)$Clade, levels=c("A","B","C","D","F","G"))])
dcyl.net <- make.net(dcyl97bs.f.p, 0)
plot(dcyl.net,
edge.curved=0.1,
edge.width=10*(E(dcyl.net)$weight)^0.2,
vertex.label=substr(V(dcyl.net)$Subtype, 1, 4),
vertex.color=taxcolors[factor(V(dcyl.net)$Clade, levels=c("A","B","C","D","F","G"))])
mcav.net <- make.net(mcav97bs.f.p, 0)
plot(mcav.net,
edge.curved=0.1,
edge.width=10*(E(mcav.net)$weight)^0.2,
vertex.label=substr(V(mcav.net)$Subtype, 1, 4),
vertex.color=taxcolors[factor(V(mcav.net)$Clade, levels=c("A","B","C","D","F","G"))])
# Compute node degrees (#links) and use that to set node size:
deg <- degree(mcav.net, mode="all")
V(mcav.net)$size <- 20
plot(mcav.net, layout=layout_with_fr(mcav.net))
# Get network with one node for each species.
# Edge weight is the mean relative abundance of OTU.
# Convert OTU table to "edges" table (weight = relative abundance)
otudf <- data.frame(t(otu_table(phy97bs.f.p)), check.names=F)
# Average relative abundance of OTU within species
#sp.ag <- aggregate(otudf, by=list(Species=data.frame(sample_data(phy97.f.p))$Species), FUN=mean)
# Percent of samples of species that contain OTU at more than 1% relative abundance
sp.ag <- aggregate(otudf, by=list(Species=data.frame(sample_data(phy97bs.f.p))$Species),
FUN=function(x) length(x[x>0.01])/length(x))
rownames(sp.ag) <- sp.ag$Species
sp.ag <- sp.ag[, -1]
edges <- setNames(melt(as.matrix(sp.ag)), nm=c("source", "target", "weight")) # Melt data
edges <- droplevels(edges[edges$weight>0, ])
# Create "nodes" table from sample and tax data
sdf <- data.frame(id=levels(data.frame(sample_data(phy97bs.f.p))$Species))
tdf <- data.frame(tax_table(phy97bs.f.p))[rownames(data.frame(tax_table(phy97bs.f.p))) %in% edges$target, ]
tdf$id <- rownames(tdf)
nodes <- merge(sdf, tdf, all=T)
nodes$type <- ifelse(is.na(nodes$Clade), 1, 2)
# Create network
net <- graph_from_data_frame(d=edges, vertices=nodes, directed=F)
# Best one for now ---------
V(net)$shape <- c("square", "circle")[factor(V(net)$type)]
V(net)$size <- ifelse(is.na(V(net)$Clade), 15, 10*sqrt(degree(net))) #5*sqrt(degree(net))
V(net)$label <- ifelse(is.na(V(net)$Clade), names(V(net)),
unlist(lapply(strsplit(V(net)$Subtype, split="_"), "[", 1)))
V(net)$label <- str_replace_all(V(net)$label, c("alcicornis"="M.alc.", "annularis"="O.ann",
"astreoides"="P.ast.", "cavernosa"="M.cav.",
"cylindrus"="D.cyl.", "fragum"="F.fra.",
"furcata"="P.fur.", "radians"="S.rad.",
"siderea"="S.sid.", "strigosa"="D.str."))
V(net)$color <- ifelse(is.na(V(net)$Clade), "white",
taxcolors[factor(V(net)$Clade, levels=c("A","B","C","D","F","G"))])
E(net)$color <- "gray60"
E(net)$width <- 15 * E(net)$weight
set.seed(12347)
set.seed(12364)
set.seed(12374)
l <- layout.fruchterman.reingold(net)
l <- norm_coords(l, ymin=-1, ymax=1, xmin=-1, xmax=1)
par(mar=c(0,0,0,0))
plot(net, rescale=F, layout=l*1, edge.curved=0.1, vertex.label.cex=V(net)$size/20,
vertex.label.color="black")
#legend(x=-1.05, y=1.1, c("CladeA", "CladeB", "CladeC", "CladeD", "CladeF", "CladeG"), pch=21,
# col="#777777", pt.bg=taxcolors, pt.cex=1.1, cex=.6, bty="n", ncol=2, text.width=0.2)